set more 1

/*
File:	cepr_org_wages.do
Date:	Nov 21, 2006
	Feb 15, 2008
	Feb 10, 2009
	Jan 4, 2010
	Feb 22, 2011
	Mar 21, 2011 CEPR ORG Version 1.6.1
	Jan 3, 2012 CEPR ORG Version 1.7
	Jan 7, 2012
	Dec 20, 2013
	Mar 12, 2015, CEPR ORG Version 2.0
	Apr  1, 2015, CEPR ORG Version 2.0.1
	March 1, 2016, CEPR ORG Version 2.1
	Oct 12, 2016, CEPR ORG Version 2.1.1
	Feb 9, 2017, CEPR ORG Version 2.2
	Apr 10, 2018, CEPR ORG Version 2.3
	Mar 22, 2019, CEPR ORG Version 2.4
	Jul 24, 2019, CEPR ORG Version 2.4.1
	Feb 05, 2020, CEPR ORG Version 2.5
	
Desc:	Creates consistent wage variables for CEPR extract of CPS ORG;
	for a full discussion of various wage series constructed here,
	see John Schmitt, "Creating a consistent hourly wage series
	from the Current Population Survey's Outgoing Rotation Group,
	1979-2002," Center for Economic and Policy Research, August 2003,
	version 0.92, available at www.cepr.net.
	From Jan 25, 2011, the adjustment for the mean above the top
	code is done separately for all, men, and women
Note:	See copyright notice at end of program.
*/

/* Determine data year */
local year=year in 1
if `year'==-17345 {local year  = 1980}
di "`year'"
	/* 
	   NOTE: this program uses NBER extract for 1979-1993 and Basic 
	   CPS for 1994-present
	*/

/* BLS allocated earnings */
/*
Hirsch and Schumacher report that from 1989-1993, BLS allocation flags are unreliable
and only about 1/4 of allocated earnings are identified by allocation flags.
Also, from Jan 1994-Aug 1995, there are no valid allocation flags.

See Hirsch and Schumacher (2004) "Match Bias in Wage Gap Estimates Due to Earnings Imputation"
for further guidance.
*/
	/* Hourly earnings allocated */

if 1979<=`year' & `year'<=1988 {
gen byte blsimph=0 if I25c~=.
replace blsimph=1 if I25c==1
}
if 1989<=`year' & `year'<=1993 {
gen byte blsimph=0 if I25c~=.
replace blsimph=1 if (1<=I25c & I25c<=8)
}
if 1994==`year' {
gen byte blsimph=. /* prhernal missing in cps basic 1994 */
}
if 1995==`year' {
  gen byte blsimph=. /* prhernal missing Jan-Aug 1995 */
  replace blsimph=0 if (9<=month & month<=12) & prhernal==0
  replace blsimph=1 if (9<=month & month<=12) & prhernal==1
}
if 1996<=`year' & `year'<=2020 {
gen byte blsimph=0 if prhernal==0
replace blsimph=1 if prhernal==1
}

lab var blsimph "BLS allocated hourly earnings"
notes blsimph: Indicates BLS allocated usual hourly earnings
notes blsimph: CPS: I25c, prhernal
notes blsimph: BLS provides no allocation info Jan 94-Aug 95
notes blsimph: According to Hirsch & Schumacher (2004), allocation flags/*
	      */ unreliable 1989-1993
notes blsimph: For 1989-1993, underlying data don't use complete range

	/* Weekly earnings allocated */

if 1979<=`year' & `year'<=1988 {
gen byte blsimpw=0 if I25d~=.
replace blsimpw=1 if I25d==1
}
if 1989<=`year' & `year'<=1993 {
gen byte blsimpw=0 if I25d~=.
replace blsimpw=1 if (1<=I25d & I25d<=8)
}
if 1994==`year' {
gen byte blsimpw=. /* prwernal missing in cps basic 1994 */
}
if 1995==`year' {
    gen byte blsimpw=.  /* prwernal missing Jan-Aug 1995 */
    replace blsimpw=0 if (9<=month & month<=12) & prwernal==0
    replace blsimpw=1 if (9<=month & month<=12) & prwernal==1
}
if 1996<=`year' & `year'<=2020 {
gen byte blsimpw=0 if prwernal==0
replace blsimpw=1 if prwernal==1
}

lab var blsimpw "BLS allocated weekly earnings"
notes blsimpw: Indicates BLS allocated usual weekly earnings
notes blsimpw: CPS: I25d, prwernal
notes blsimpw: BLS provides no allocation info Jan 94-Aug 95
notes blsimpw: According to Hirsch & Schumacher (2004), allocation flags/*
	      */ unreliable 1989-1993

/* Paid by the hour indicator variable */

if 1979<=`year' & `year'<=1993 {
replace paidhre=0 if paidhre==2
}
if 1994<=`year' & `year'<=2020 {
gen paidhre=0 if peernhry==2
replace paidhre=1 if peernhry==1
}
lab var paidhre "Paid by hour"
notes paidhre: Indicates BLS records respondent's earnings by hour
notes paidhre: Not a consistent indicator of "hourly worker" status
notes paidhre: CPS: derived from a-hrlywk, peernhry

/* Hourly earnings if "paid by hour" (paidhre==1) */

gen wage1=.

if 1979<=`year' & `year'<=1993 {
replace wage1=earnhre/100 if paidhre==1 /* convert from pennies to dollars */
}
if 1994<=`year' & `year'<=2020 {
replace wage1=prernhly/100 if paidhre==1 /* convert from pennies to dollars */
replace wage1=. if prernhly<0
}
lab var wage1 "Hourly wage (if hourly worker)"
notes wage1: Dollars per hour
notes wage1: For hourly workers only
notes wage1: Excludes overtime, tips, commissions
notes wage1: Top-code 1979-84: 99.00
notes wage1: Top-code 1985-98: set so that hours worked times earnhre /*
*/ < weekly earnings top-code
notes wage1: Top-code 1998-: set so that hours worked times earnhre /*
*/ < 1,998, which is less than the weekly earnings top-code of 2,884 /*
*/ [BLS documentation; check]
notes wage1: Top-code 1985-: NBER states that top-code not applied /* 
*/ to all observations
notes wage1: Bottom-code: 1979-88: 0.50; 1994: 0.10; 1995: 0.20
notes wage1: set to missing if prernhly<0 to get rid of neg values
notes wage1: CPS: a-herntp, prernhly, pternhly

/* Usual weekly earnings including overtime, tips, commissions 
   nonhourly workers (paidhre==0) and hourly workers (paidhre==1)
*/
	
gen weekpay=.

/* The NBER extract contains three usual weekly earnings variables, 
which we include here for 1979-1993.

The first two are uearnwk ("unedited," available 1979-1993) and uearnwke 
("edited," available 1979-1988 only). For *hourly* workers, these variables
give the usual weekly earnings *including* overtime, tips, and commissions. 
Between 1989 and 1993, when uearnwke is not available, few observations on 
hourly workers show uearnwk greater than the product of earnhre (usual 
hourly pay) times uhourse (usual weekly hours), suggesting that the CPS may 
not have reliably captured the overtime, tips, and commissions received by 
hourly workers between 1989 and 1993. 

For hourly workers in 1979-1988, the CEPR extract uses uearnwke for usual 
weekly earnings including overtime, tips, and commissions. For hourly workers
1989-1993, the CEPR extract uses uearnwk, which does appear to capture well
overtime, tips, and commissions.

The third variable is earnwke ("edited," available 1979-1993). For hourly
workers, this variable contains the product of earnhre (usual hourly pay) 
times uhourse (usual weekly hours); so, by definition, it excludes overtime,
tips, and commissions for hourly workers. For nonhourly workers, earnwke 
does include overtime, tips, and commissions.

*/
	
if 1979<=`year' & `year'<=1988 {
replace uearnwk=. if uearnwk<0
replace weekpay=earnwke if paidhre==0
replace weekpay=uearnwke if paidhre==1
replace weekpay=. if earnwke<0
}
if 1989<=`year' & `year'<=1993 {
replace uearnwk=. if uearnwk<0
gen byte uearnwke=.
replace weekpay=earnwke if paidhre==0
replace weekpay=uearnwk if paidhre==1 /* note shift from uearnwke to uearnwk */
}
if 1994<=`year' & `year'<=2020 {
gen byte uearnwk=.
gen uearnwke=.
gen earnwke=.
replace weekpay=prernwa/100 if paidhre==0 /* convert from pennies to dollars */
replace weekpay=prernwa/100 if paidhre==1 
replace weekpay=. if prernwa<0
}
lab var uearnwk "Weekly pay"
notes uearnwk: Dollars per week
notes uearnwk: Includes overtime, tips, commissions
notes uearnwk: Intended for hourly workers only
notes uearnwk: Available 1979-93 only
notes uearnwk: Top-code: 1979-88: 999; 1989-93: 1999
notes uearnwk: set to missing if neg value
notes uearnwk: CPS: a$grwkel1-4; prernwa

lab var uearnwke "Weekly pay"
notes uearnwke: Dollars per week
notes uearnwke: Includes overtime, tips, commissions
notes uearnwke: Intended for hourly workers only
notes uearnwke: Available 1979-88 only
notes uearnwke: Top-code: 1999
notes uearnwke: CPS: a-brswk (locations 427-429)

lab var earnwke "Weekly pay"
notes earnwke: Dollars per week
notes earnwke: Includes overtime, tips, commissions
notes earnwke: Intended for nonhourly workers
notes earnwke: For paidhre=1: earnhre*uhourse; for paidhre=0: uearnwke
notes earnwke: Top-code: 1979-88: 999; 1989-97: 1923; 1998-: 2884
notes earnwke: CPS: a-werntp, prernwa, pternwa
notes earnwke: CPS: 1979-88: location 417-419

lab var weekpay "Weekly pay"
notes weekpay: Dollars per week
notes weekpay: For nonhourly and hourly workers
notes weekpay: Includes overtime, tips, commissions
notes weekpay: Top-code: 1979-88: 999; 1989-97: 1923; 1998-: 2884
notes weekpay: CPS 1994-: prernwa
notes weekpay: CPS 1979-88: location 417-419

/* Usual hourly earnings including overtime, tips, commissions 
   nonhourly workers (paidhre==0)
*/

gen wage2=.

if 1979<=`year' & `year'<=1993 {
replace wage2=weekpay/uhourse if paidhre==0
replace wage2=. if wage2<0
}
if 1994<=`year' & `year'<=2020 {
replace wage2=weekpay/pehrusl1 if paidhre==0
replace wage2=. if wage2<0
}
lab var wage2 "Hourly wage (if nonhourly worker)"
notes wage2: Dollars per hour
notes wage2: For nonhourly workers only
notes wage2: Includes overtime, tips, commissions
notes wage2: Usual weekly earnings / usual weekly hours
notes wage2: 1979-1993: weekpay/uhourse; 1994-present: weekpay/pehrusl1
notes wage2: CPS top code weekly earnings in `year': $topcode

/* NBER-style wage variable usual hourly earnings
   INcluding overtime, tips, commissions for nonhourly workers
   EXcluding overtime, tips, commissions for hourly workers
*/

gen wage3=wage1 if paidhre==1
replace wage3=wage2 if paidhre==0

lab var wage3 "Hourly wage"
notes wage3: Dollars per hour
notes wage3: For hourly and nonhourly workers
notes wage3: Approximates NBER's recommended wage variable
notes wage3: Includes overtime, tips, commissions for nonhourly
notes wage3: Excludes overtime, tips, commissions for hourly
notes wage3: No adjustments for top-coding
notes wage3: No trimming of outliers
notes wage3: Excludes nonhourly workers whose usual hours vary

/* Usually receive overtime, tips, commissions 1994- */

gen byte otcrec=.

if 1979<=`year' & `year'<=1988 {
gen byte peernuot=.
replace otcrec=0 if paidhre==1 & wage1~=.
replace otcrec=1 if paidhre==1 & (wage1*uhourse)<uearnwke & uearnwke~=.
}
if 1989<=`year' & `year'<=1993 {
gen byte peernuot=.
replace otcrec=0 if paidhre==1 & wage1~=.
replace otcrec=1 if paidhre==1 & (wage1*uhourse)<uearnwk & uearnwk~=. 
/* note change to uearnwk here relative to 1979-1988 */
}
if 1994<=`year' & `year'<=2020 {
replace otcrec=0 if paidhre==1 & peernuot==2
replace otcrec=1 if paidhre==1 & peernuot==1
}
lab var otcrec "Usually receive overtime, tips, commissions"
notes otcrec: Hourly workers only
notes otcrec: Different methodology 1979-1993 versus 1994-present
notes otcrec: CPS: Derived from peernuot

/* Weekly earnings from overtime, tips, commissions 1994- */

gen byte otcamt=.

if 1979<=`year' & `year'<=1988 {
replace otcamt=round(uearnwke-(wage1*uhourse),1) if otcrec==1
replace otcamt=. if otcamt<0
}
if 1989<=`year' & `year'<=1993 {
replace otcamt=round(uearnwk-(wage1*uhourse),1) if otcrec==1
/* note change to uearnwk here relative to 1979-1988 */
replace otcamt=. if otcamt<0
}
if 1994<=`year' & `year'<=2020 {
replace otcamt=peern/100 if otcrec==1
replace otcamt=. if otcamt<0
}
format otcamt %5.0f /* format to no decimal places */
lab var otcamt "Weekly earnings overtime, tips, commissions"
notes otcamt: Hourly workers only
notes otcamt: Different methodology 1979-1993 versus 1994-present
notes otcamt: CPS: derived from peern

/* Usual hourly earnings
   including overtime, tips, commissions for 
   hourly and nonhourly workers
*/

gen wage4=.

if 1979<=`year' & `year'<=1988 {
replace wage4=weekpay/uhourse if paidhre==1
replace wage4=wage3 if (wage4<wage3 & wage3~=.) & paidhre==1
/* prevents wage including overtime, tips, and commissions
from being less than wage excluding overtime, tips, and
commissions
*/
replace wage4=wage3 if paidhre==0
}
/* uearnwk in NBER extract described as:
"Item 25d. Earnings per week...includes overtime tips
and commissions. Use this field (or uearnwke) for 
hourly workers."
Available 1979-1993
Range; 0-999 for 1979-1988; 0-1,999 for 1989-1993 
*/
if 1989<=`year' & `year'<=1993 {
replace wage4=weekpay/uhourse if paidhre==1
/* note change to uearnwk here relative to 1979-1988 */
replace wage4=wage3 if (wage4<wage3 & wage3~=.) & paidhre==1
/* prevents wage including overtime, tips, and commissions
from being less than wage exclusing overtime, tips, and
commissions */
replace wage4=wage3 if paidhre==0
}
/* uearnwke in NBER extract described as:
"Edited Item 25d. Earnings per week...Include[s] any overtime
pay, commissions, or tips usually received Source: locations 
427-429 on the BLS tape."
Available 1979-1988 only.
Range: 0-1,999.
[This variable seems preferable to uearnwk, but is
only available for 1979-1988. Program here uses uearnwk
for consistency across full period 1979-1993.]
***BUT, IT DOESN'T USE UEARNWK ACROSS FULL PERIOD... ONLY 1989-1993
*/
if 1994<=`year' & `year'<=2020 {
	 /* for hourly */
replace wage4=wage1 if paidhre==1
/* About one-fourth of hourly workers report wages at 
other periodicities (weekly, monthly, etc.); these workers are not asked
provide peernhro, which we use to calculate minimum wage for the
rest of hourly workers. For the subset of hourly workers without a 
valid peernhro, we estimate hourly earnings including overtime, tips, 
and commissions by dividing weekly earnings (prernwa) by usual hours worked 
(pehrusl1) */
replace wage4=(weekpay/pehrusl1) if paidhre==1 & /*
*/ (wage1<(weekpay/pehrusl1) & (weekpay/pehrusl1)~=.)

/* For hourly workers with information on peernhro, we use that
information to calculate wages with overtime, tips, and commissions. */

replace wage4=wage1+(otcamt/peernhro) if paidhre==1 & /*
*/ otcrec==1 & (0<otcamt & otcamt~=.) & (0<peernhro & peernhro<=99)
replace wage4=. if wage4<0
	/* for nonhourly */
replace wage4=wage3 if paidhre==0
}
lab var wage4 "Hourly wage"
notes wage4: Dollars per hour
notes wage4: For hourly and nonhourly workers
notes wage4: Includes overtime, tips, commissions for nonhourly and hourly
notes wage4: Covers only hourly workers who report hourly rate of pay
notes wage4: No adjustments for top-coding
notes wage4: No trimming of outliers
notes wage4: Excludes nonhourly workers whose usual hours vary
notes wage4: Consistent 1994-2020, experimental 1979-1993

/* rw : 2020 version of CEPR's preferred variable for analyzing 1979-2020
	excludes overtime, tips, commissions for hourly workers
	includes overtime, tips, commissions for nonhourly workers

      Based on wage3 with following adjustments:
      
      Real 2019 $:   Using CPI-U-RS 
      Top-coding:   Top-coded weekly earnings recoded to estimated mean above 
		    the top-code based on lognormal function, separately by 
		    gender
      "Hours vary": Includes observations with imputations for workers who
		    report that their weekly "hours vary"
      Trimmed:      excludes observations where real 1989 hourly wage 
		    <$0.50 or >$200
*/

	/* load real wage program */
cd "$do"
do cepr_org_realwage.do


gen w_ln_no=wage3 /* = wage1 if hourly; wage2 if nonhourly */
  /* include those whose "hours vary" 1994- */
replace w_ln_no=weekpay/uhoursi if paidhre==0 & (1994<=year & year<=2020)
replace w_ln_no=. if w_ln_no<0
  /* top-coding */
replace w_ln_no=$matclnm/uhoursi if paidhre==0 & tc==1 & female==0
replace w_ln_no=$matclnf/uhoursi if paidhre==0 & tc==1 & female==1
  /* real 2019 $, trimmed */
realw w_ln_no
gen rw=tw_ln_no
replace rw=. if tw_ln_no<$dol_b
replace rw=. if $dol_t<tw_ln_no
drop tw_ln_no
lab var rw "Real wage, 2019$"
notes rw: For hourly workers, includes overtime, tips, commissions
notes rw: For nonhourly workers, includes overtime, tips, commissions
notes rw: Includes weekly workers whose hours vary (usual hours imputed)
notes rw: Converted to real wage using CPI-U-RS
notes rw: Values less than $0.50 or greater than $200 (in 1989$) set to missing
notes rw: Estimated mean above topcode differs by gender
		    
/* rw_ot : 2020 version of CEPR's preferred variable for analyzing 1994-2020
	includes overtime, tips, commissions for hourly workers
	includes overtime, tips, commissions for nonhourly workers

      Based on wage4 with following adjustments:
      
      Real 2019$:   Using CPI-U-RS 
      Top-coding:   Top-coded weekly earnings recoded to estimated mean above 
		    the top-code based on lognormal function, separately by 
		    gender
      "Hours vary": Includes observations with imputations for workers who
		    report that their weekly "hours vary"
      Trimmed:      excludes observations where real 1989 hourly wage 
		    <$0.50 or >$200
*/

gen w_ln_ot=wage4 

if 1994<=`year' & `year'<=2020 {
/* include those whose "hours vary" 1994- */
    /* nonhourly */
replace w_ln_ot=weekpay/uhoursi if paidhre==0
    /* hourly */
/* About one-fourth of hourly workers report wages at 
other periodicities (weekly, monthly, etc.); these workers are not asked
to provide peernhro, which we use to calculate otc wage for the
rest of hourly workers. For the subset of hourly workers without a 
valid peernhro, we estimate hourly earnings including overtime, tips, 
and commissions by dividing weekly earnings (prernwa) by usual hours worked 
(uhoursi) */
replace wage4=(weekpay/uhoursi) if hrsvary==1 & paidhre==1 & /*
*/ (wage1<(weekpay/uhoursi) & (weekpay/uhoursi)~=.)
/* For hourly workers with information on peernhro, we use that
information to calculate wages with overtime, tips, and commissions. */
replace wage4=wage1+(otcamt/peernhro) if paidhre==1 & /*
*/ otcrec==1 & (0<otcamt & otcamt~=.) & (0<peernhro & peernhro<=99)
}
replace wage4=. if wage4<0
replace w_ln_ot=. if w_ln_ot<0
  /* top-coding */
replace w_ln_ot=$matclnm/uhoursi if paidhre==0 & tc==1 & female==0
replace w_ln_ot=$matclnf/uhoursi if paidhre==0 & tc==1 & female==1
  /* real 2019 $, trimmed */
realw w_ln_ot
gen rw_ot=tw_ln_ot
replace rw_ot=. if tw_ln_ot<$dol_b
replace rw_ot=. if $dol_t<tw_ln_ot
drop tw_ln_ot
lab var rw_ot "Real wage, 2019$"
notes rw_ot: For hourly workers, excludes overtime, tips, commissions
notes rw_ot: For nonhourly workers, includes overtime, tips, commissions
notes rw_ot: Includes weekly workers whose hours vary (usual hours imputed)
notes rw_ot: Converted to real wage using CPI-U-RS
notes rw_ot: Values less than $0.50 or greater than $200 (in 1989$) set to missing
notes rw_ot: Estimated mean above topcode differs by gender

* format to two decimals
format w_* rw* %5.2f
			
/* 16. Whether labor force data were self-reported or collected by proxy */

if 1979<=`year' & `year'<=1993 {
gen byte proxy=.
}
if 1994<=`year' & `year'<=2020 {
gen byte proxy=1 if puslfprx==1
replace proxy=2 if puslfprx==2
replace proxy=3 if puslfprx==3
}
lab var proxy "Self or proxy response"
lab define proxy 1 Self 2 Proxy 3 "Proxy and Self"
lab val proxy proxy
notes proxy: Labor force information collected by self or proxy
notes proxy: CPS: puslfprx

if 1979<=`year' & `year'<=1993 {
gen byte wholine=.
}
if 1994<=`year' & `year'<=2020 {
gen byte wholine=hurespli if (0<=hurespli & hurespli<=99)
}
lab var wholine "Line number of respondent"
notes wholine: Can be used to identify respondent (self or proxy)
notes wholine: CPS: hurespli

if 1979<=`year' & `year'<=1993 {
gen byte reltoref=.
}
if 1994<=`year'& `year'<=2020 {
gen byte reltoref=perrp if 1<=perrp & perrp<=18
}
lab var reltoref "Relationship to reference person"
#delimit ;
lab def reltoref
1 "Reference person w/ relatives"
2 "Reference person w/o relatives"
3 "Spouse"
4 "Child"
5 "Grandchild"
6 "Parent"
7 "Sibling"
8 "Other relative"
9 "Foster child"
10 "Nonrelative"
11 "Partner/roommate"
12 "Nonrelative"
13 "Unmarried partner w/ relatives"
14 "Unmarried partner w/o relatives"
15 "Housemate w/ relatives"
16 "Housemate w/o relatives"
17 "Boarder w/ relatives"
18 "Boarder w/o relatives"
;
#delimit cr
/* For Jan 1994-Feb 1995, only takes values 1-12; 13-18 begin Mar 1995.
From Mar 95-on: 11 (partner/roommate) no longer used March 95-present */

lab val reltoref reltoref
notes reltoref: Jan 1994-Feb 1995: range is 1-12
notes reltoref: Mar 1995- 13-18 added
notes reltoref: Before Mar 1995, 11 is Partner/Roommate; after, not used
notes reltoref: CPS perrp

/* 
Copyright 2020 CEPR and John Schmitt

This file is part of the cepr_org_master.do program. This file and all
programs referenced in it are free software. You can redistribute the
program or modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
USA.
*/
